/**********************************************************************
  Copyright(c) 2022-2023 Arm Corporation All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions
  are met:
    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in
      the documentation and/or other materials provided with the
      distribution.
    * Neither the name of Arm Corporation nor the names of its
      contributors may be used to endorse or promote products derived
      from this software without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/

#include "include/ipsec_ooo_mgr.h"
#include "include/zuc_internal.h"
#include <assert.h>
#ifdef SAFE_PARAM
#include "error.h"
#endif
#include "clear_regs_mem_aarch64.h"

#ifndef SUBMIT_JOB_ZUC128_EEA3
#define SUBMIT_JOB_ZUC128_EEA3    submit_job_zuc_eea3_aarch64
#define SUBMIT_JOB_ZUC256_EEA3    submit_job_zuc256_eea3_aarch64
#define FLUSH_JOB_ZUC128_EEA3     flush_job_zuc_eea3_aarch64
#define FLUSH_JOB_ZUC256_EEA3     flush_job_zuc256_eea3_aarch64
#define SUBMIT_JOB_ZUC128_EIA3    submit_job_zuc_eia3_aarch64
#define SUBMIT_JOB_ZUC256_EIA3    submit_job_zuc256_eia3_aarch64
#define FLUSH_JOB_ZUC128_EIA3     flush_job_zuc_eia3_aarch64
#define FLUSH_JOB_ZUC256_EIA3     flush_job_zuc256_eia3_aarch64
#define	ZUC_EIA3_4_BUFFER         zuc_eia3_4_buffer_job_aarch64
#define	ZUC256_EIA3_4_BUFFER      zuc256_eia3_4_buffer_job_aarch64
#define	ZUC128_INIT_4             asm_ZucInitialization_4_aarch64
#define	ZUC256_INIT_4             asm_Zuc256Initialization_4_aarch64
#define	ZUC_CIPHER_4              asm_ZucCipher_4_aarch64
#endif

#define ZUC_MB_MAX_LANES_SIMD    4
//ZUC state (LFSR (16) + X0-X3 (4) + R1-R2 (2))
#define ZUC_STATE_LENGTH         16 + 4 + 2
#define JOB_IS_COMPLETED(state, i)  \
        (((state->job_in_lane[i]) != NULL) && (state->lens[i] == 0))
#define JOB_NOT_INITIALIZED(state, i) \
        ((state->init_not_done) & (1 << i))
#define JOB_IS_NULL(state, i) \
        (state->job_in_lane[i] == NULL)

IMB_JOB *SUBMIT_JOB_ZUC128_EEA3(MB_MGR_ZUC_OOO *state, IMB_JOB *job);
IMB_JOB *SUBMIT_JOB_ZUC256_EEA3(MB_MGR_ZUC_OOO *state, IMB_JOB *job);
IMB_JOB *FLUSH_JOB_ZUC128_EEA3(MB_MGR_ZUC_OOO *state);
IMB_JOB *FLUSH_JOB_ZUC256_EEA3(MB_MGR_ZUC_OOO *state);
IMB_JOB *SUBMIT_JOB_ZUC128_EIA3(MB_MGR_ZUC_OOO *state, IMB_JOB *job);
IMB_JOB *SUBMIT_JOB_ZUC256_EIA3(MB_MGR_ZUC_OOO *state, IMB_JOB *job, const uint64_t tag_sz);
IMB_JOB *FLUSH_JOB_ZUC128_EIA3(MB_MGR_ZUC_OOO *state);
IMB_JOB *FLUSH_JOB_ZUC256_EIA3(MB_MGR_ZUC_OOO *state, const uint64_t tag_sz);

typedef enum {
    ZUC_128,
    ZUC_256
} ZUC_TYPE;

// Read 8*6 bits and store them as 8 partial bytes
// (using 6 least significant bits)
static void expand_from_6_to_8_bytes(uint8_t *pOutput, const uint8_t *pInput)
{
    uint64_t bit_mask[8] = {0x3f, 0xfc0, 0x3f000, 0xfc0000, 0x3f000000, 0xfc0000000,
                            0x3f000000000, 0xfc0000000000};
    uint8_t inputarr[8] = {0};
    uint64_t num64bit;
    int i;

    // store 6 bytes input to 8 bytes array in reverse order, inputarr[i] = 0xfedcba00
    for (i = 0; i <= 5; i++)
        inputarr[i] = *(pInput + (5 - i));

    // cast 8 bytes array to uint64 number, num64bit=0xabcdef
    num64bit = *(uint64_t *)(&inputarr[0]);

    for (i = 0; i <= 7; i++) {
        *(pOutput + i) = (num64bit & bit_mask[7 - i]) >> (48 - 6 * (i+1));
    }
}

static void zuc_mb_mgr_insert_eea3_job(MB_MGR_ZUC_OOO *state, IMB_JOB *job, ZUC_TYPE zuc)
{
    uint64_t used_lane_idx = state->unused_lanes & 0xff;
    assert(used_lane_idx < ZUC_MB_MAX_LANES_SIMD);

    state->unused_lanes =  state->unused_lanes >> 8;
    if (zuc == ZUC_128) {
        memcpy(state->args.iv + used_lane_idx * 32, job->iv, 16);
    } else {
        if (job->iv_len_in_bytes == 25) {
            memcpy(state->args.iv + used_lane_idx * 32, job->iv, 25);
        } else {
            // copy first 17 bytes
            memcpy(state->args.iv + used_lane_idx * 32, job->iv, 17);
            // expand next 6 bytes to 8 bytes
            expand_from_6_to_8_bytes(state->args.iv + used_lane_idx * 32 + 17, job->iv + 17);
        }
    }
    state->job_in_lane[used_lane_idx] = job;
    state->init_not_done |= 1 << used_lane_idx;
    state->unused_lane_bitmask &= ~(1 << used_lane_idx);
    state->args.in[used_lane_idx] = job->src + \
                                    job->cipher_start_src_offset_in_bytes;
    state->args.keys[used_lane_idx] = job->enc_keys;
    state->args.out[used_lane_idx] = job->dst;
    state->lens[used_lane_idx] = job->msg_len_to_cipher_in_bytes;
}

static void zuc_mb_mgr_insert_eia3_job(MB_MGR_ZUC_OOO *state, IMB_JOB *job, ZUC_TYPE zuc)
{
    uint64_t used_lane_idx = state->unused_lanes & 0xff;
    assert(used_lane_idx < ZUC_MB_MAX_LANES_SIMD);

    state->unused_lanes =  state->unused_lanes >> 8;
    if (zuc == ZUC_128) {
        memcpy(state->args.iv + used_lane_idx * 32, job->u.ZUC_EIA3._iv, 16);
    } else {
        if (job->u.ZUC_EIA3._iv != NULL) {
            memcpy(state->args.iv + used_lane_idx * 32, job->u.ZUC_EIA3._iv, 25);
        } else {
            // copy first 17 bytes
            memcpy(state->args.iv + used_lane_idx * 32, job->u.ZUC_EIA3._iv23, 17);
            // expand next 6 bytes to 8 bytes
            expand_from_6_to_8_bytes(state->args.iv + used_lane_idx * 32 + 17, job->u.ZUC_EIA3._iv23 + 17);
        }
    }
    state->job_in_lane[used_lane_idx] = job;
    state->init_not_done |= 1 << used_lane_idx;
    state->unused_lane_bitmask &= ~(1 << used_lane_idx);
    state->args.in[used_lane_idx] = job->src + \
                                    job->hash_start_src_offset_in_bytes;
    state->args.keys[used_lane_idx] = job->u.ZUC_EIA3._key;
    state->args.out[used_lane_idx] = job->auth_tag_output;
    state->lens[used_lane_idx] = job->msg_len_to_hash_in_bits;
}

static IMB_JOB *zuc_mb_mgr_free_eea3_job(MB_MGR_ZUC_OOO *state)
{
    IMB_JOB *ret = NULL;

    for(int i = 0; i <= ZUC_MB_MAX_LANES_SIMD; i++)
    {
        if(JOB_IS_COMPLETED(state, i))
        {
            ret = state->job_in_lane[i];
            state->job_in_lane[i] = NULL;
            ret->status |= IMB_STATUS_COMPLETED_CIPHER;
            state->unused_lanes = state->unused_lanes << 8;
            state->unused_lanes |= i;
            state->unused_lane_bitmask |= (1 << i);
#ifdef SAFE_DATA
            for(int j = 0; j < 16 + 2; j++)
                state->state[4*j + i] = 0;
#endif
            break;
        }
    }

    return ret;
}

static IMB_JOB *zuc_mb_mgr_free_eia3_job(MB_MGR_ZUC_OOO *state)
{
    IMB_JOB *ret = NULL;

    for(int i = 0; i <= ZUC_MB_MAX_LANES_SIMD; i++)
    {
        if(JOB_IS_COMPLETED(state, i))
        {
            ret = state->job_in_lane[i];
            state->job_in_lane[i] = NULL;
            ret->status |= IMB_STATUS_COMPLETED_AUTH;
            state->lens[i] = 0xffffffff;
            state->unused_lanes = state->unused_lanes << 8;
            state->unused_lanes |= i;
            state->unused_lane_bitmask |= (1 << i);
#ifdef SAFE_DATA
            for(int j = 0; j < 16 + 2; j++)
                state->state[4*j + i] = 0;
#endif
            break;
        }
    }

    return ret;
}

static IMB_JOB *zuc_mb_mgr_submit_eea3_job(MB_MGR_ZUC_OOO *state,
                                           IMB_JOB *job,
                                           ZUC_TYPE zuc)
{
#ifdef SAFE_PARAM
    /* reset error status */
    if (imb_errno != 0)
        imb_set_errno(NULL, 0);

    if (job->enc_keys == NULL) {
        imb_set_errno(NULL, IMB_ERR_NULL_EXP_KEY);
        return NULL;
    }
    if (job->iv == NULL) {
        imb_set_errno(NULL, IMB_ERR_NULL_IV);
        return NULL;
    }

    if (job->src == NULL) {
        imb_set_errno(NULL, IMB_ERR_NULL_SRC);
        return NULL;
    }
    if (job->dst == NULL) {
        imb_set_errno(NULL, IMB_ERR_NULL_DST);
        return NULL;
    }
    if ((job->msg_len_to_cipher_in_bytes == 0) ||
        (job->msg_len_to_cipher_in_bytes > ZUC_MAX_BYTELEN)) {
        imb_set_errno(NULL, IMB_ERR_CIPH_LEN);
        return NULL;
    }
    if (zuc == ZUC_256) {
        if (job->iv_len_in_bytes != 23 && job->iv_len_in_bytes != 25) {
            imb_set_errno(NULL, IMB_ERR_IV_LEN);
            return NULL;
        }
    }
#endif
    IMB_JOB *ret = NULL;
    uint32_t state_tmp[MAX_ZUC_STATE_SZ] = {0};
    uint32_t min_len = state->lens[0];

    zuc_mb_mgr_insert_eea3_job(state, job, zuc);

    if(state->unused_lanes != 0xff)
        return NULL;

    ret = zuc_mb_mgr_free_eea3_job(state);
    if(ret != NULL)
        return ret;

    uint32_t len1 = (state->lens[0] < state->lens[1] ?
                     state->lens[0]:state->lens[1]);
    uint32_t len2 = (state->lens[2] < state->lens[3] ?
                     state->lens[2]:state->lens[3]);
    min_len = (len1 < len2 ? len1: len2);

    if(zuc == ZUC_128)
        ZUC128_INIT_4((ZucKey4_t *)state->args.keys,
                      (const uint8_t *)state->args.iv,
                      (ZucState4_t *)&state_tmp[0]);
    else
        ZUC256_INIT_4((ZucKey4_t *)state->args.keys,
                      (const uint8_t *)state->args.iv,
                      (ZucState4_t *)&state_tmp[0], 2);

    // copy new job's state to global one.
    for(int i = 0; i < ZUC_MB_MAX_LANES_SIMD; i++)
    {
        if(JOB_NOT_INITIALIZED(state, i))
        {
            for(int j = 0; j < 16 + 2; j++) {
                state->state[4*j + i] = state_tmp[4*j + i];
            }
        }
    }

    // Init done for all lanes
    state->init_not_done = 0;

    ZUC_CIPHER_4((ZucState4_t *)&state->state[0],
                 (const uint64_t **)state->args.in,
                 (uint64_t **)state->args.out,
                 &state->lens[0], min_len);

    ret = zuc_mb_mgr_free_eea3_job(state);

#ifdef SAFE_DATA
    memset(state_tmp, 0, MAX_ZUC_STATE_SZ * 4);
    /* Clear sensitive data in registers */
    CLEAR_SCRATCH_GPS();
    CLEAR_SCRATCH_SIMD_REGS();
#endif

    return ret;
}

static IMB_JOB *zuc_mb_mgr_flush_eea3_job(MB_MGR_ZUC_OOO *state, ZUC_TYPE zuc)
{
    IMB_JOB *ret = NULL;
    uint32_t state_tmp[MAX_ZUC_STATE_SZ] = {0};
    uint32_t min_len, i, j, idx = 0;

    // check for empty
    if(state->unused_lanes >> 39)
        return ret;

    // Set length = 0xFFFFFFFF in NULL jobs
    for (i = 0; i < ZUC_MB_MAX_LANES_SIMD; i++) {
        if(state->job_in_lane[i] == NULL)
            state->lens[i] = 0xffffffff;
    }

    ret = zuc_mb_mgr_free_eea3_job(state);
    if (ret != NULL)
        return ret;

    min_len = state->lens[0];

    for (i = 1; i < ZUC_MB_MAX_LANES_SIMD; i++) {
        if (min_len > state->lens[i]) {
            min_len = state->lens[i];
            idx = i;
        }
    }

    // copy good lane to empty lanes
    for (i = 0; i < ZUC_MB_MAX_LANES_SIMD; i++) {
        if (JOB_IS_NULL(state, i)) {
            state->args.in[i] = state->args.in[idx];
            state->args.out[i] = state->args.out[idx];
            state->args.keys[i] = state->args.keys[idx];
            //state->args.iv[i] = state->args.iv[idx];
        }
    }

    // initial the job if there is any job not being initialized.
    if (state->init_not_done != 0) {
        if(zuc == ZUC_128)
            ZUC128_INIT_4((ZucKey4_t *)state->args.keys,
                          (const uint8_t *)state->args.iv,
                          (ZucState4_t *)&state_tmp[0]);
        else
            ZUC256_INIT_4((ZucKey4_t *)state->args.keys,
                          (const uint8_t *)state->args.iv,
                          (ZucState4_t *)&state_tmp[0], 2);

        // copy new job status
        for(i = 0; i< ZUC_MB_MAX_LANES_SIMD; i++) {
            if (JOB_NOT_INITIALIZED(state, i)) {
                for(j = 0; j < 16 + 2; j++) {
                    state->state[4*j + i] = state_tmp[4*j + i];
                }
            }
        }

        // init done for all lanes
        state->init_not_done = 0;
    }

    // copy state from good lane to NULL lanes
    for(i = 0; i < ZUC_MB_MAX_LANES_SIMD; i++)
    {
        if (JOB_IS_NULL(state, i)) {
            for(int j = 0; j < 16 + 2; j++) {
                state->state[4*j + i] = state->state[4*j + idx];
            }
        }
    }

    ZUC_CIPHER_4((ZucState4_t *)&state->state[0],
                 (const uint64_t **)state->args.in,
                 (uint64_t **)state->args.out,
                 &state->lens[0], min_len);

    ret = zuc_mb_mgr_free_eea3_job(state);

#ifdef SAFE_DATA
    for (i = 0; i < ZUC_MB_MAX_LANES_SIMD; i++) {
        if (JOB_IS_NULL(state, i)) {
            for (j = 0; j < 16 + 2; j++) {
                state->state[4*j + i] = 0;
            }
        }
    }

    memset(state_tmp, 0, MAX_ZUC_STATE_SZ * 4);

    /* Clear sensitive data in registers */
    CLEAR_SCRATCH_GPS();
    CLEAR_SCRATCH_SIMD_REGS();
#endif

    return ret;
}

static IMB_JOB *zuc_mb_mgr_submit_eia3_job(MB_MGR_ZUC_OOO *state,
                                           IMB_JOB *job,
                                           ZUC_TYPE zuc,
                                           const uint64_t tag_sz)
{

#ifdef SAFE_PARAM
        /* reset error status */
        if (imb_errno != 0)
                imb_set_errno(NULL, 0);

        if (job->u.ZUC_EIA3._key == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_EXP_KEY);
                return NULL;
        }

        if (job->src == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_SRC);
                return NULL;
        }
        if (job->auth_tag_output == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_AUTH);
                return NULL;
        }
        if ((job->msg_len_to_hash_in_bits == 0) ||
            (job->msg_len_to_hash_in_bits > ZUC_MAX_BITLEN)) {
                imb_set_errno(NULL, IMB_ERR_AUTH_LEN);
                return NULL;
        }
        if (zuc == ZUC_256) {
            if (job->u.ZUC_EIA3._iv == NULL && job->u.ZUC_EIA3._iv23 == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_IV);
                return NULL;
            }
        } else {
            if (job->u.ZUC_EIA3._iv == NULL) {
                imb_set_errno(NULL, IMB_ERR_NULL_IV);
                return NULL;
            }
        }
#endif

    IMB_JOB *ret = NULL;
    unsigned int i;

    zuc_mb_mgr_insert_eia3_job(state, job, zuc);

    if(state->unused_lanes != 0xff)
        return NULL;

    ret = zuc_mb_mgr_free_eia3_job(state);
    if(ret != NULL)
       return ret;

    if(zuc == ZUC_128)
        ZUC_EIA3_4_BUFFER((const void * const *)state->args.keys,
                          (const uint8_t *)state->args.iv,
                          (const void * const *)state->args.in,
                          (uint32_t **)state->args.out,
                          state->lens,
                          (const void * const *)state->job_in_lane);
    else
        ZUC256_EIA3_4_BUFFER((const void * const *)state->args.keys,
                             (const uint8_t *)state->args.iv,
                             (const void * const *)state->args.in,
                             (uint32_t **)state->args.out,
                             state->lens,
                             (const void * const *)state->job_in_lane,
                             tag_sz);

    // clear all lengths(function will authenticate all buffers)
    for (i = 0; i < ZUC_MB_MAX_LANES_SIMD; i++) {
        state->lens[i] = 0;
    }

    ret = zuc_mb_mgr_free_eia3_job(state);

    return ret;
}

static IMB_JOB *zuc_mb_mgr_flush_eia3_job(MB_MGR_ZUC_OOO *state,
                                          ZUC_TYPE key,
                                          const uint64_t tag_sz)
{
    IMB_JOB *ret = NULL;
    uint32_t min_len, i, idx = 0;

    // check for empty
    if (state->unused_lanes >> 39)
        return ret;

    ret = zuc_mb_mgr_free_eia3_job(state);
    if (ret != NULL)
        return ret;

    // Set length = 0xFFFFFFFF in NULL jobs
    for (i = 0; i < ZUC_MB_MAX_LANES_SIMD; i++) {
        if(state->job_in_lane[i] == NULL)
            state->lens[i] = 0xffffffff;
    }

    min_len = state->lens[0];

    for (i = 1; i < ZUC_MB_MAX_LANES_SIMD; i++) {
        if (min_len > state->lens[i]) {
            min_len = state->lens[i];
            idx = i;
        }
    }

    // copy good lane to empty lanes
    for (i = 0; i < ZUC_MB_MAX_LANES_SIMD; i++) {
        if (JOB_IS_NULL(state, i)) {
            state->args.in[i] = state->args.in[idx];
            state->args.out[i] = state->args.out[idx];
            state->args.keys[i] = state->args.keys[idx];
            //state->args.iv[i] = state->args.iv[idx];
            state->lens[i] = state->lens[idx];
        }
    }

    if(key == ZUC_128)
        ZUC_EIA3_4_BUFFER((const void * const *)state->args.keys,
                          (const uint8_t *)state->args.iv,
                          (const void * const *)state->args.in,
                          (uint32_t **)state->args.out,
                          state->lens,
                          (const void * const *)state->job_in_lane);
    else
        ZUC256_EIA3_4_BUFFER((const void * const *)state->args.keys,
                             (const uint8_t *)state->args.iv,
                             (const void * const *)state->args.in,
                             (uint32_t **)state->args.out,
                             state->lens,
                             (const void * const *)state->job_in_lane,
                             tag_sz);

    // clear all lengths of valid jobs and set to FFFFFFFF to NULL jobs
    for (i = 0; i < ZUC_MB_MAX_LANES_SIMD; i++) {
        if (JOB_IS_NULL(state, i)) {
            state->lens[i] = 0xffffffff;
        } else {
            state->lens[i] = 0;
        }
    }

    ret = zuc_mb_mgr_free_eia3_job(state);

    return ret;
}

IMB_JOB *SUBMIT_JOB_ZUC128_EEA3(MB_MGR_ZUC_OOO *state, IMB_JOB *job)
{
    return zuc_mb_mgr_submit_eea3_job(state, job, ZUC_128);
}

IMB_JOB *SUBMIT_JOB_ZUC256_EEA3(MB_MGR_ZUC_OOO *state, IMB_JOB *job)
{
    return zuc_mb_mgr_submit_eea3_job(state, job, ZUC_256);
}

IMB_JOB *FLUSH_JOB_ZUC128_EEA3(MB_MGR_ZUC_OOO *state)
{
    return zuc_mb_mgr_flush_eea3_job(state, ZUC_128);
}

IMB_JOB *FLUSH_JOB_ZUC256_EEA3(MB_MGR_ZUC_OOO *state)
{
    return zuc_mb_mgr_flush_eea3_job(state, ZUC_256);
}

IMB_JOB *SUBMIT_JOB_ZUC128_EIA3(MB_MGR_ZUC_OOO *state, IMB_JOB *job)
{
   return zuc_mb_mgr_submit_eia3_job(state, job, ZUC_128, 4);
}

IMB_JOB *SUBMIT_JOB_ZUC256_EIA3(MB_MGR_ZUC_OOO *state,
                                IMB_JOB *job,
                                const uint64_t tag_sz)
{
   return zuc_mb_mgr_submit_eia3_job(state, job, ZUC_256, tag_sz);
}

IMB_JOB *FLUSH_JOB_ZUC128_EIA3(MB_MGR_ZUC_OOO *state)
{
    return zuc_mb_mgr_flush_eia3_job(state, ZUC_128, 4);
}

IMB_JOB *FLUSH_JOB_ZUC256_EIA3(MB_MGR_ZUC_OOO *state, const uint64_t tag_sz)
{
    return zuc_mb_mgr_flush_eia3_job(state, ZUC_256, tag_sz);
}
